library(p8105.datasets)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
data("rest_inspec")
rest_inspec %>% 
  janitor::clean_names() %>% 
  mutate(inspection_date <- as.Date(inspection_date)) %>%
  filter(inspection_date >= as.Date("2014-01-01"), 
         score != "Missing", boro != "Missing") %>%
  mutate(year = year(inspection_date)) %>%
   plot_ly(x = ~boro, y = ~score, type = "box",
    color = ~boro, frame = ~year, alpha = 0.5) %>% 
  layout(title = "The Distribution of Inspection Score",
         xaxis = list(title = "Borough"),
         yaxis = list(title = "Each Restaurant's Score"))
rest_inspec %>% 
  janitor::clean_names() %>% 
  mutate(inspection_date <- as.Date(inspection_date)) %>%
  filter(inspection_date >= as.Date("2015-01-01"), 
         score != "Missing", boro != "Missing") %>%
  mutate(year = year(inspection_date)) %>%
  group_by(camis, boro) %>%
  summarise(score_per_rest_by_boro = mean(score)) %>%
   plot_ly(x = ~boro, y = ~score_per_rest_by_boro, type = "box",
    color = ~boro, alpha = 0.5) %>% 
  layout(title = "The Distribution of Inspection Score",
         xaxis = list(title = "Borough"),
         yaxis = list(title = "Each Restaurant's Mean Score (2015-2017)"))
## `summarise()` has grouped output by 'camis'. You can override using the `.groups` argument.
grade_plot_by_boro <- rest_inspec %>% 
  janitor::clean_names() %>% 
  mutate(inspection_date <- as.Date(inspection_date)) %>%
  filter(inspection_date >= as.Date("2014-01-01"), 
         score != "Missing", score >= 0, boro != "Missing") %>%
  mutate(year = year(inspection_date)) %>%
  mutate(grade = case_when(0 <= score & score <= 13 ~ "A",
                         14 <= score & score <= 27 ~ "B",
                         score >= 28 ~ "C")) %>%
  group_by(boro,grade, year) %>%
  summarise(Count = n()) %>%
  rename(Grade = grade) %>%
  ggplot(aes(boro, Count, fill = Grade, frame = year)) + geom_bar(stat  = "identity", position = position_dodge2())
## `summarise()` has grouped output by 'boro', 'grade'. You can override using the `.groups` argument.
ggplotly(grade_plot_by_boro)
rest_inspec %>% 
  janitor::clean_names() %>% 
  group_by(violation_code) %>% 
  count(sort = T)
## # A tibble: 100 × 2
## # Groups:   violation_code [100]
##    violation_code     n
##    <chr>          <int>
##  1 10F            55228
##  2 08A            40258
##  3 04L            27594
##  4 02G            27235
##  5 06C            26135
##  6 06D            25912
##  7 10B            22365
##  8 04N            20128
##  9 02B            19562
## 10 04M             8952
## # … with 90 more rows
rest_inspec %>%
  filter(violation_code %in% c("08A", "04L")) %>%
  mutate(Date = format(inspection_date, "%Y-%m")) %>%
  group_by(Date, violation_code) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  mutate(violation_code = violation_code %>% fct_relevel("08A", "04L")) %>%
  mutate(violation_code = recode(violation_code, "08A" = "Facility not vermin proof",
                                 "04L" = "Evidence of mice or live mice")) %>%
  plot_ly(x = ~Date, y = ~count,
          color = ~violation_code, type = "scatter", mode = "lines") 
## `summarise()` has grouped output by 'Date'. You can override using the `.groups` argument.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
names(rest_inspec)
##  [1] "action"                "boro"                  "building"             
##  [4] "camis"                 "critical_flag"         "cuisine_description"  
##  [7] "dba"                   "inspection_date"       "inspection_type"      
## [10] "phone"                 "record_date"           "score"                
## [13] "street"                "violation_code"        "violation_description"
## [16] "zipcode"               "grade"                 "grade_date"
skimr::skim(rest_inspec)
Data summary
Name rest_inspec
Number of rows 397584
Number of columns 18
_______________________
Column type frequency:
character 12
numeric 3
POSIXct 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
action 1117 1.00 32 131 0 5 0
boro 0 1.00 5 13 0 6 0
building 130 1.00 1 10 0 7267 0
critical_flag 0 1.00 8 14 0 3 0
cuisine_description 0 1.00 4 64 0 85 0
dba 334 1.00 2 86 0 21018 0
inspection_type 1117 1.00 25 59 0 34 0
phone 3 1.00 10 12 0 25261 0
street 5 1.00 3 40 0 3326 0
violation_code 6316 0.98 3 3 0 99 0
violation_description 6890 0.98 27 360 0 93 0
grade 201937 0.49 1 14 0 6 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
camis 0 1.00 44534756.00 4277136.67 30075445 41227319 41622444 50011150 50071063 ▁▁▇▁▅
score 22642 0.94 18.93 13.00 -2 11 15 24 151 ▇▂▁▁▁
zipcode 5 1.00 10675.34 598.72 10001 10022 10468 11229 11697 ▇▂▁▆▃

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
inspection_date 0 1.00 1900-01-01 00:00:00 2017-10-17 00:00:00 2016-02-03 00:00:00 1420
record_date 0 1.00 2017-10-19 06:00:49 2017-10-19 06:00:59 2017-10-19 06:00:49 3
grade_date 204287 0.49 2012-05-01 00:00:00 2017-10-17 00:00:00 2016-02-17 00:00:00 1331